home *** CD-ROM | disk | FTP | other *** search
- #! /usr/bin/python
-
- """
- A command-line interface to the validating xmlproc parser. Prints error
- messages and can output the parsed data in various formats.
- """
-
- usage=\
- """
- Usage:
-
- xvcmd.py [options] [urlstodocs]
-
- ---Options:
- -c catalog: path to catalog file to use to resolve public identifiers
- -l language: ISO 3166 language code for language to use in error messages
- -o format: Format to output parsed XML. 'e': ESIS, 'x': canonical XML
- and 'n': normalized XML. No data will be output if this
- option is not specified.
- urlstodocs: URLs to the documents to parse. (You can use plain file names
- as well.) Can be omitted if a catalog is specified and contains
- a DOCUMENT entry.
- -n: Report qualified names as 'URI name'. (Namespace processing.)
- --nowarn: Suppress warnings.
- --entstck: Show entity stack on errors.
- --rawxml: Show raw XML string where error occurred.
-
- Catalog files with URLs that end in '.xml' are assumed to be XCatalogs,
- all others are assumed to be SGML Open Catalogs.
-
- If the -c option is not specified the environment variables XMLXCATALOG
- and XMLSOCATALOG will be used (in that order).
- """
-
- from xml.parsers.xmlproc import xmlval,catalog,xcatalog,xmlproc,_outputters
- import sys, getopt, os, string
-
- # --- Utilities
-
- def print_usage(message):
- print message
- print usage
- sys.exit(1)
-
- # --- Initialization
-
- print "xmlproc version %s" % xmlval.version
-
- p=xmlval.XMLValidator()
-
- # --- Interpreting options
-
- try:
- (options,sysids)=getopt.getopt(sys.argv[1:],"c:l:o:n",
- ["nowarn","entstck","rawxml"])
- except getopt.error,e:
- print_usage("Usage error: "+e.msg)
-
- warnings=1
- entstack=0
- rawxml=0
- cat=None
- pf=None
- namespaces=0
- app=xmlproc.Application()
- err_lang=None
-
- for option in options:
- if option[0]=="-c":
- cat=option[1]
- elif option[0]=="-l":
- try:
- p.set_error_language(option[1])
- err_lang=option[1]
- except KeyError:
- print "Error: Language '%s' not available" % option[1]
- elif option[0]=="-o":
- if string.lower(option[1]) == "e":
- app = _outputters.ESISDocHandler()
- elif string.lower(option[1]) == "x":
- app = _outputters.Canonizer()
- elif string.lower(option[1]) == "n":
- app = _outputters.DocGenerator()
- else:
- print_usage("Error: Unknown output format " + option[1])
-
- elif option[0]=="-n":
- namespaces=1
- elif option[0]=="--nowarn":
- warnings=0
- elif option[0]=="--entstck":
- entstack=1
- elif option[0]=="--rawxml":
- rawxml=1
-
- # Acting on option settings
-
- err = _outputters.MyErrorHandler(p, p.parser, warnings, entstack, rawxml)
- p.set_error_handler(err)
-
- if namespaces:
- from xml.parsers.xmlproc import namespace
-
- nsf=namespace.NamespaceFilter(p)
- nsf.set_application(app)
- p.set_application(nsf)
- else:
- p.set_application(app)
-
- if cat!=None:
- pf=xcatalog.FancyParserFactory(err_lang)
- elif cat==None and os.environ.has_key("XMLXCATALOG"):
- cat=os.environ["XMLXCATALOG"]
- pf=xcatalog.XCatParserFactory(err_lang)
- elif cat==None and os.environ.has_key("XMLSOCATALOG"):
- cat=os.environ["XMLSOCATALOG"]
- pf=catalog.CatParserFactory(err_lang)
-
- if cat!=None:
- print "Parsing catalog file '%s'" % cat
- cat=catalog.xmlproc_catalog(cat,pf,err)
- p.set_pubid_resolver(cat)
-
- if len(sysids)==0:
- if cat==None:
- print_usage("You must specify a system identifier if no catalog is "
- "used")
- elif cat.get_document_sysid()==None:
- print_usage("You must specify a system identifier if the catalog has "
- "no DOCUMENT entry")
-
- sysids=[cat.get_document_sysid()]
- print "Parsing DOCUMENT '%s' from catalog" % sysids[0]
-
- # --- Parsing
-
- for sysid in sysids:
- print
- print "Parsing '%s'" % sysid
- p.parse_resource(sysid)
- print
- print "Parse complete, %d error(s)" % err.errors,
- if warnings:
- print "and %d warning(s)" % err.warnings
- else:
- print
- err.reset()
- p.reset()
-